In [ ]:
import os
os.getcwd()
print(os.getcwd())
## 获取当前工作目录
import pandas as pd
from pandas import DataFrame #数据框的简写,本书采用的是该格式,直接使用DataFrame即可
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
#pd.set_option('display.max_rows', None) #显示所有数据行
pd.set_option('display.max_rows',10) #共显示10行数据
pd.set_option('display.max_columns',None)# 设置查看列不省略
pd.set_option('display.precision',2)#设置后四位
pd.options.display.float_format='{:.2f}'.format # 全局设置二位小数
- demo1——标普500数据主成分降维后,主成分回归分析
数据清洗,缺失值使用后一日的收盘价填充,整列缺失值或大量缺失值则删除。
In [ ]:
#爬取标普500数据,12-2至12-10分钟级收盘价数据,创建demo数据库
import datetime as dt
import yfinance as yf
def get_data_tocsv(url=r'.\sp500tickers.xlsx'):
msft = yf.Ticker('SPY')
hist = msft.history(start="2024-12-2", end="2024-12-10", interval="1m")
lie_all=hist['Close'].rename('SPY',inplace=True)
df = pd.read_excel(url)
for i in df.iloc[:,0]:
msft=yf.Ticker(i)
hist=msft.history(start="2024-12-2", end="2024-12-10", interval="1m")
lie=hist['Close'].rename(i,inplace=True)
lie_all=pd.concat([lie_all,lie],axis=1)
lie_all.to_csv('sp500_data.csv')
return lie_all
get_data_tocsv()
In [3]:
from sklearn.preprocessing import StandardScaler
data_origin=pd.read_csv(r'.\sp500_data.csv')
data_origin.rename(columns={data_origin.columns[0]: 'time'}, inplace=True)
data_origin.set_index(data_origin.columns[0], inplace=True)#设置时间为索引
data_origin.info()
#需要剔除的列 AZO,BKNG,NVR,POOL,BF.B,TPL,AIZ,BRK.B,CINF,ERIE,FDS,EG,MTD,BR,HUBB,JBHT,NDSN,SNA,GWW,FICO,TYL
need_to_drop=['AZO','BKNG','NVR','POOL','BF.B','TPL','AIZ','BRK.B','CINF','ERIE','FDS','EG','MTD','BR','HUBB','JBHT','NDSN','SNA','GWW','FICO','TYL']
data=data_origin.drop(need_to_drop,axis=1).bfill()#删除列和填充缺失值
missing_values = data.isnull().sum().sum()#查看是否还有缺失值
print(missing_values)
scaler = StandardScaler()#标准化
data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
data.head()#查看数据
y_data = data['SPY'] #分开自变量和因变量
x_data = data.drop('SPY', axis=1)
x_data.shape
<class 'pandas.core.frame.DataFrame'> Index: 2340 entries, 2024-12-02 09:30:00-05:00 to 2024-12-09 15:59:00-05:00 Columns: 504 entries, SPY to XEL dtypes: float64(504) memory usage: 9.0+ MB 0
Out[3]:
| SPY | GOOGL | GOOG | T | CHTR | CMCSA | EA | FOXA | FOX | IPG | LYV | MTCH | META | NFLX | NWSA | NWS | OMC | PARA | TMUS | TTWO | VZ | DIS | WBD | ABNB | AMZN | APTV | BBY | BWA | CZR | KMX | CCL | CMG | DHI | DRI | DECK | DPZ | EBAY | EXPE | F | GRMN | GM | GPC | HAS | HLT | HD | LVS | LEN | LKQ | LOW | LULU | MAR | MCD | MGM | MHK | NKE | NCLH | ORLY | PHM | RL | ROST | RCL | SBUX | TPR | TSLA | TJX | TSCO | ULTA | WYNN | YUM | MO | ADM | BG | CPB | CHD | CLX | KO | CL | CAG | STZ | COST | DG | DLTR | EL | GIS | HSY | HRL | SJM | K | KVUE | KDP | KMB | KHC | KR | LW | MKC | TAP | MDLZ | MNST | PEP | PM | PG | SYY | TGT | TSN | WBA | WMT | APA | BKR | CVX | COP | CTRA | DVN | FANG | EOG | EQT | XOM | HAL | HES | KMI | MPC | OXY | OKE | PSX | SLB | TRGP | VLO | WMB | AFL | ALL | AXP | AIG | AMP | AON | ACGL | AJG | BAC | BLK | BX | BK | BRO | COF | CBOE | SCHW | CB | C | CFG | CME | CPAY | DFS | FIS | FITB | FI | BEN | GPN | GL | GS | HIG | HBAN | ICE | IVZ | JKHY | JPM | KEY | KKR | L | MTB | MKTX | MMC | MA | MET | MCO | MS | MSCI | NDAQ | NTRS | PYPL | PNC | PFG | PGR | PRU | RJF | RF | SPGI | STT | SYF | TROW | TRV | TFC | USB | V | WRB | WFC | WTW | ABT | ABBV | A | ALGN | AMGN | BAX | BDX | TECH | BIIB | BSX | BMY | CAH | CTLT | COR | CNC | CRL | CI | COO | CVS | DHR | DVA | DXCM | EW | ELV | GEHC | GILD | HCA | HSIC | HOLX | HUM | IDXX | INCY | PODD | ISRG | IQV | JNJ | LH | LLY | MCK | MDT | MRK | MRNA | MOH | PFE | DGX | REGN | RMD | RVTY | SOLV | STE | SYK | TFX | TMO | UNH | UHS | VRTX | VTRS | WAT | WST | ZBH | ZTS | MMM | AOS | ALLE | AMTM | AME | ADP | AXON | BA | BLDR | CHRW | CARR | CAT | CTAS | CPRT | CSX | CMI | DAY | DE | DAL | DOV | ETN | EMR | EFX | EXPD | FAST | FDX | FTV | GE | GEV | GNRC | GD | HON | HWM | HII | IEX | ITW | IR | J | JCI | LHX | LDOS | LMT | MAS | NSC | NOC | ODFL | OTIS | PCAR | PH | PAYX | PAYC | PNR | PWR | RSG | ROK | ROL | RTX | LUV | SWK | TXT | TT | TDG | UBER | UNP | UAL | UPS | URI | VLTO | VRSK | WAB | WM | XYL | ACN | ADBE | AMD | AKAM | APH | ADI | ANSS | AAPL | AMAT | ANET | ADSK | AVGO | CDNS | CDW | CSCO | CTSH | GLW | CRWD | DELL | ENPH | EPAM | FFIV | FSLR | FTNT | IT | GEN | GDDY | HPE | HPQ | IBM | INTC | INTU | JBL | JNPR | KEYS | KLAC | LRCX | MCHP | MU | MSFT | MPWR | MSI | NTAP | NVDA | NXPI | ON | ORCL | PLTR | PANW | PTC | QRVO | QCOM | ROP | CRM | STX | NOW | SWKS | SMCI | SNPS | TEL | TDY | TER | TXN | TRMB | VRSN | WDC | ZBRA | APD | ALB | AMCR | AVY | BALL | CE | CF | CTVA | DOW | DD | EMN | ECL | FMC | FCX | IFF | IP | LIN | LYB | MLM | MOS | NEM | NUE | PKG | PPG | SHW | SW | STLD | VMC | ARE | AMT | AVB | BXP | CPT | CBRE | CSGP | CCI | DLR | EQIX | EQR | ESS | EXR | FRT | DOC | HST | INVH | IRM | KIM | MAA | PLD | PSA | O | REG | SBAC | SPG | UDR | VTR | VICI | WELL | WY | AES | LNT | AEE | AEP | AWK | ATO | CNP | CMS | ED | CEG | D | DTE | DUK | EIX | ETR | EVRG | ES | EXC | FE | NEE | NI | NRG | PCG | PNW | PPL | PEG | SRE | SO | VST | WEC | XEL | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -1.22 | -2.68 | -2.64 | -1.22 | -0.07 | 0.53 | -2.11 | 2.24 | 2.24 | 0.80 | 1.09 | -1.10 | -3.01 | -2.19 | -1.40 | -2.09 | 0.78 | -0.81 | 1.11 | -0.43 | 1.90 | 2.33 | -1.53 | -0.91 | -1.37 | -1.39 | 0.74 | 0.19 | 0.79 | -0.53 | -1.59 | -1.00 | 1.79 | 2.78 | -1.57 | 3.02 | -0.51 | -1.35 | 2.50 | -0.77 | 2.20 | 0.89 | -0.39 | 0.11 | 0.42 | -1.26 | 1.65 | -0.41 | 0.39 | -1.15 | 0.01 | -0.92 | 1.81 | 2.07 | -0.52 | 0.06 | -0.45 | 1.56 | -0.95 | -0.76 | -0.47 | 1.64 | -1.09 | -0.82 | 0.64 | -0.55 | -1.24 | -0.09 | 0.45 | 1.70 | 1.66 | 1.52 | 1.42 | 1.83 | -0.49 | 1.85 | 1.53 | -0.97 | 0.67 | -1.92 | -1.64 | -1.58 | -1.97 | 0.06 | -0.56 | 0.07 | 0.60 | 1.61 | 1.72 | -0.20 | 1.60 | 1.25 | 1.19 | -1.39 | 0.28 | 1.12 | 1.25 | 1.33 | 1.89 | 1.47 | 1.53 | -1.74 | 0.04 | 1.24 | 0.82 | -1.22 | 1.45 | 1.91 | 1.63 | 2.97 | 2.60 | 1.76 | 1.68 | 2.54 | 1.65 | 1.56 | 1.52 | 1.41 | 2.22 | 1.19 | 1.56 | 2.53 | 0.97 | 1.47 | 2.90 | 1.33 | 2.89 | 3.17 | 2.56 | 2.33 | 2.47 | 2.74 | 1.99 | 2.01 | 1.69 | 2.79 | -0.52 | 4.31 | 1.87 | 1.42 | 4.95 | 1.29 | 2.55 | 1.38 | -1.06 | 3.28 | -0.45 | 1.72 | 4.92 | 1.30 | 2.32 | 1.80 | 0.95 | 1.33 | 2.11 | 3.12 | 1.87 | 2.66 | 4.34 | 0.66 | 2.17 | 3.32 | 3.66 | 2.48 | 1.98 | 2.55 | 1.19 | 1.26 | 2.37 | 2.29 | 2.37 | 2.24 | -1.13 | 2.87 | 2.32 | -0.46 | 3.04 | 2.21 | 1.66 | 2.17 | 2.50 | 3.00 | 0.97 | 0.74 | 0.93 | -0.56 | 1.69 | 2.65 | 3.09 | 1.96 | 2.54 | 3.58 | -0.52 | 2.98 | 1.43 | -0.94 | -0.64 | 1.96 | 1.29 | 0.03 | 0.24 | -0.07 | 0.62 | 0.67 | -0.17 | -2.96 | 1.09 | 1.40 | 0.47 | 1.19 | 0.88 | 1.30 | 0.90 | 2.06 | -1.11 | -1.14 | 0.97 | 0.77 | -0.06 | 0.75 | 1.61 | 1.98 | 1.78 | -1.15 | 0.38 | -0.14 | -0.04 | -0.60 | 1.61 | 1.13 | -1.31 | 1.26 | 1.32 | -0.71 | -0.16 | -0.80 | 0.54 | 1.39 | -1.04 | 1.80 | -0.50 | 1.11 | 1.43 | 1.66 | 1.42 | 0.10 | 0.96 | 1.55 | 0.72 | 1.17 | -0.73 | 2.36 | 1.43 | -0.95 | 1.03 | 1.72 | 1.61 | 1.36 | 0.68 | 1.38 | -0.87 | -0.23 | 1.59 | -0.77 | 2.32 | 2.91 | 0.88 | 2.22 | 1.62 | -0.96 | -0.10 | 1.45 | -0.04 | 2.81 | 1.63 | 0.40 | 0.07 | 0.67 | 2.04 | 2.92 | 1.49 | 0.99 | -0.26 | 1.93 | 2.95 | 3.02 | 0.12 | 2.67 | 0.84 | 2.36 | 1.04 | 1.99 | 0.91 | 1.83 | 1.36 | 2.64 | -0.94 | 1.87 | 1.58 | 1.67 | 2.41 | 0.03 | 2.03 | 1.18 | -0.25 | 1.90 | 1.67 | 2.24 | -0.31 | 2.09 | 2.77 | -1.95 | 1.72 | 1.34 | 0.68 | -0.53 | 0.78 | 3.07 | -0.67 | 2.78 | 0.65 | 2.31 | 1.60 | -0.62 | 1.72 | -0.75 | 0.88 | -1.36 | -0.52 | -3.58 | -1.88 | -0.29 | 0.25 | -1.96 | -0.36 | -1.68 | -2.10 | -1.64 | -1.14 | -2.14 | -0.59 | -0.94 | -0.90 | -1.98 | 0.56 | -0.63 | 0.20 | -1.60 | 0.76 | -0.87 | 0.19 | -1.09 | -0.92 | -1.03 | -2.12 | -1.24 | 2.09 | -1.14 | 1.21 | -0.49 | -2.05 | -1.03 | -2.52 | 1.23 | -2.43 | -2.59 | -1.62 | 0.85 | -1.79 | -1.07 | 0.81 | 0.95 | -0.47 | -1.36 | -1.43 | -1.95 | -0.32 | -1.42 | 0.62 | -1.22 | 1.63 | -1.36 | -0.34 | -2.51 | 0.35 | -0.43 | 2.60 | -1.96 | 0.93 | -2.11 | -0.84 | 1.90 | 0.33 | 1.53 | 0.35 | 1.69 | 1.57 | 0.89 | 0.79 | -0.35 | 1.24 | 1.39 | 1.12 | 2.44 | 0.21 | -0.00 | 1.28 | 2.69 | 1.26 | 1.50 | 1.63 | 2.61 | 0.01 | 0.32 | 1.36 | 1.73 | -1.17 | 1.69 | 1.79 | 1.58 | 1.08 | 1.70 | 0.38 | 2.61 | 1.68 | 2.94 | 2.40 | 1.38 | 1.55 | 2.52 | 0.51 | 2.76 | 3.07 | 2.25 | 4.89 | 2.42 | -0.83 | 2.50 | 1.54 | 2.75 | 1.95 | 1.20 | 2.30 | 4.29 | 3.54 | 1.18 | 1.66 | 3.32 | 2.51 | 3.35 | 2.90 | 1.47 | -1.14 | 2.54 | 3.33 | 2.86 | 2.03 | 2.27 | 1.92 | 2.53 | 2.67 | 1.40 | 2.76 | 2.64 | 2.62 | 3.21 | 2.45 | 3.64 | 2.27 | 4.48 | 2.91 | 3.28 | 3.37 | 1.29 | 0.35 | 3.23 | 2.01 | 2.54 | 2.25 | 2.51 | 1.27 | 2.66 | 2.16 |
| 1 | -1.31 | -2.26 | -2.25 | -1.80 | -0.23 | 0.51 | -2.41 | 1.79 | 1.87 | 0.59 | 1.35 | -0.59 | -2.71 | -2.08 | -1.18 | -1.94 | 0.76 | -0.67 | 1.03 | -0.97 | 1.57 | 2.75 | -1.21 | -0.52 | -1.23 | -1.24 | 0.23 | 0.15 | 0.45 | -0.78 | -0.93 | -1.00 | 1.63 | 2.60 | -2.02 | 2.23 | -0.84 | -1.20 | 2.40 | -0.75 | 2.14 | 0.86 | -0.44 | 0.23 | -0.43 | -0.66 | 1.46 | -0.52 | -0.39 | -1.00 | 0.05 | -1.01 | 1.48 | 2.33 | -0.85 | 0.69 | -0.45 | 1.52 | -1.04 | -0.72 | -0.33 | 1.63 | -1.15 | -0.73 | 0.40 | -1.16 | -1.41 | -0.20 | -0.54 | 1.32 | 1.75 | 1.43 | 1.52 | 1.25 | -0.75 | 1.96 | 1.29 | -0.69 | 0.75 | -2.03 | -1.66 | -1.63 | -2.01 | 0.22 | -0.54 | 0.06 | 0.43 | 0.95 | 1.48 | -0.27 | 1.39 | 0.99 | 1.07 | -1.49 | 0.03 | 1.14 | 1.29 | 1.60 | 1.80 | 1.27 | 1.29 | -1.64 | -0.13 | 0.95 | 1.35 | -1.16 | 1.47 | 1.66 | 1.58 | 2.78 | 2.53 | 1.86 | 1.90 | 2.44 | 1.33 | 1.62 | 1.53 | 1.47 | 1.93 | 1.26 | 1.72 | 2.44 | 0.86 | 1.46 | 2.78 | 1.21 | 2.65 | 3.11 | 1.99 | 2.44 | 2.30 | 2.10 | 1.59 | 1.83 | 1.47 | 2.56 | -0.70 | 3.46 | 1.77 | 1.45 | 4.39 | 1.11 | 2.36 | 1.22 | -1.29 | 3.03 | -0.75 | 1.64 | 4.57 | 0.85 | 2.32 | 1.74 | 1.08 | 1.32 | 1.75 | 3.03 | 1.68 | 2.45 | 4.38 | 0.46 | 1.98 | 2.74 | 3.47 | 2.48 | 1.92 | 2.35 | 1.22 | 1.12 | 1.62 | 2.21 | 1.89 | 2.49 | -2.04 | 2.52 | 1.99 | -0.43 | 2.61 | 2.46 | 1.50 | 2.08 | 2.47 | 2.59 | 0.73 | 0.65 | 0.61 | -0.91 | 1.51 | 2.13 | 2.72 | 1.39 | 2.03 | 2.90 | -0.65 | 2.84 | 1.32 | -1.21 | -1.12 | 1.79 | 1.27 | -0.08 | -0.38 | 0.26 | 0.77 | 0.54 | -0.44 | -2.54 | 0.93 | 1.41 | 0.67 | 1.18 | 0.84 | 1.24 | 0.94 | 1.79 | -1.09 | -1.15 | 1.00 | 0.68 | 0.10 | 0.70 | 1.61 | 1.88 | 1.79 | -1.36 | 0.37 | -0.17 | -0.11 | -0.09 | 1.60 | 1.19 | -1.50 | 1.14 | 1.42 | -0.96 | -0.09 | -1.05 | 0.50 | 1.24 | -0.90 | 1.73 | -0.62 | 1.07 | 1.32 | 1.67 | 1.18 | 0.30 | 0.94 | 1.48 | 0.60 | 1.11 | -1.10 | 1.57 | 0.98 | -1.07 | 0.97 | 1.45 | 0.68 | 0.98 | 1.00 | 1.40 | -0.97 | -0.10 | 1.63 | -0.99 | 2.22 | 2.64 | 0.80 | 1.89 | 1.63 | -0.76 | 0.25 | 1.40 | -0.02 | 2.57 | 1.31 | 0.47 | -0.11 | 0.07 | 1.70 | 2.79 | 2.91 | 0.67 | -0.19 | 1.82 | 2.56 | 2.90 | 0.07 | 2.46 | 0.52 | 2.61 | 1.16 | 1.91 | 0.72 | 1.70 | 1.35 | 2.29 | 0.27 | 1.75 | 0.98 | 1.63 | 2.15 | -0.03 | 1.86 | 1.40 | -0.27 | 1.11 | 1.57 | 2.05 | -0.17 | 1.95 | 2.51 | -1.79 | 1.75 | 1.51 | 0.64 | -0.97 | 0.81 | 3.07 | -0.71 | 2.59 | 0.49 | 1.99 | 1.21 | -0.61 | 1.68 | -0.87 | 0.84 | -1.36 | -0.36 | -2.88 | -1.71 | -0.25 | 0.20 | -1.81 | -0.26 | -1.89 | -2.45 | -1.76 | -1.13 | -2.04 | -0.77 | -0.60 | -0.54 | -2.26 | 0.19 | -0.13 | -0.00 | -1.60 | 1.32 | -1.13 | 0.72 | -1.22 | -1.08 | -1.02 | -3.05 | -1.27 | 2.15 | -1.32 | 2.57 | -0.46 | -1.78 | -0.84 | -2.04 | 1.27 | -2.24 | -2.45 | -1.47 | 0.51 | -1.89 | -1.03 | 0.87 | 1.06 | -1.52 | -1.50 | -1.52 | -2.49 | -0.05 | -1.16 | 0.42 | -1.23 | 1.54 | -1.40 | -0.16 | -3.10 | 0.46 | -0.58 | 1.27 | -1.83 | 1.17 | -2.22 | -0.92 | 1.55 | -0.07 | 1.45 | 0.54 | 1.59 | 1.46 | 0.88 | 0.66 | -0.17 | 1.25 | 1.34 | 0.84 | 2.09 | 0.27 | -0.00 | 1.18 | 2.68 | 1.06 | 1.32 | 1.61 | 2.51 | 0.00 | 0.46 | 1.36 | 1.94 | -1.24 | 1.59 | 1.61 | 1.44 | 1.08 | 1.62 | -0.25 | 2.49 | 1.69 | 2.82 | 1.65 | 1.23 | 1.45 | 1.92 | -0.33 | 2.71 | 2.93 | 1.93 | 4.42 | 1.96 | -1.11 | 2.31 | 1.01 | 2.15 | 1.53 | 0.90 | 1.92 | 3.45 | 2.97 | 1.18 | 1.51 | 3.00 | 2.16 | 3.12 | 2.51 | 1.62 | -1.51 | 2.05 | 3.04 | 2.51 | 1.89 | 2.14 | 1.60 | 2.21 | 2.36 | 0.81 | 2.61 | 2.55 | 2.34 | 2.81 | 2.16 | 3.28 | 2.12 | 4.18 | 2.63 | 2.90 | 2.89 | 0.94 | 0.24 | 2.70 | 1.85 | 2.20 | 2.01 | 2.30 | 0.35 | 2.38 | 1.98 |
| 2 | -1.25 | -2.54 | -2.42 | -1.95 | -0.23 | 0.47 | -2.27 | 1.83 | 1.78 | 0.59 | 1.42 | -0.58 | -2.72 | -1.53 | -1.21 | -1.94 | 0.72 | -0.77 | 1.10 | -0.41 | 1.60 | 2.47 | -1.59 | -0.66 | -1.19 | -1.14 | 0.42 | -0.02 | 0.81 | -0.77 | -0.34 | -1.02 | 1.51 | 2.59 | -1.96 | 2.41 | -0.75 | -1.07 | 2.14 | -0.87 | 1.88 | 0.77 | -0.15 | 0.20 | -0.38 | -0.31 | 1.39 | -0.18 | -0.31 | -1.02 | 0.03 | -1.04 | 1.73 | 2.00 | -0.91 | 1.42 | -0.68 | 1.24 | -0.80 | -0.68 | -0.35 | 1.45 | -1.09 | -0.56 | 0.67 | -1.16 | -1.56 | 0.26 | -0.57 | 1.48 | 1.57 | 1.28 | 1.52 | 0.70 | -0.87 | 2.07 | 1.37 | -0.79 | 0.50 | -2.01 | -1.58 | -1.29 | -2.08 | 0.11 | -0.54 | 0.06 | 0.73 | 0.95 | 1.52 | -0.31 | 1.35 | 1.06 | 0.99 | -1.31 | -0.04 | 2.37 | 1.31 | 1.70 | 1.80 | 1.32 | 1.28 | -1.63 | 0.13 | 0.83 | 1.30 | -1.22 | 1.27 | 1.57 | 1.42 | 2.69 | 2.35 | 1.82 | 1.87 | 2.27 | 1.06 | 1.44 | 1.41 | 1.28 | 1.89 | 1.25 | 1.56 | 2.09 | 0.88 | 1.36 | 2.53 | 1.18 | 2.53 | 2.97 | 1.80 | 2.57 | 2.24 | 2.10 | 1.46 | 1.68 | 1.55 | 2.42 | -1.18 | 3.29 | 1.71 | 1.26 | 4.50 | 1.16 | 2.43 | 1.35 | -1.30 | 2.72 | -0.57 | 1.53 | 3.94 | 0.52 | 1.91 | 1.65 | 1.11 | 1.67 | 1.75 | 2.96 | 1.51 | 2.20 | 4.12 | 0.46 | 1.62 | 2.66 | 3.34 | 2.45 | 1.48 | 2.33 | 1.15 | 1.13 | 1.60 | 2.02 | 1.69 | 2.25 | -1.91 | 2.61 | 1.99 | -0.33 | 2.59 | 2.12 | 1.51 | 1.78 | 2.26 | 2.40 | 0.61 | 0.55 | 0.54 | -0.30 | 1.74 | 1.93 | 2.71 | 1.37 | 1.98 | 2.82 | -0.95 | 2.81 | 1.29 | -1.19 | -1.23 | 1.86 | 1.29 | 0.24 | -0.46 | 0.44 | 1.06 | 0.44 | -0.67 | -2.54 | 0.78 | 1.29 | 1.08 | 1.18 | 0.95 | 1.21 | 0.98 | 1.33 | -1.14 | -1.40 | 1.03 | 1.00 | 0.26 | 0.75 | 1.61 | 1.90 | 1.77 | -0.93 | 0.55 | -0.17 | -0.07 | -0.09 | 1.61 | 1.19 | -1.33 | 1.15 | 1.93 | -0.86 | -0.16 | -1.05 | 0.40 | 1.46 | -0.90 | 1.65 | -0.41 | 1.16 | 2.01 | 1.76 | 0.75 | 0.21 | 0.95 | 1.57 | 0.60 | 1.20 | -1.57 | 1.57 | 1.17 | -0.98 | 0.83 | 1.51 | 0.68 | 0.97 | 1.01 | 1.38 | -0.92 | -0.21 | 1.66 | -1.02 | 2.31 | 2.81 | 0.72 | 2.22 | 1.65 | -0.63 | 0.39 | 1.64 | -0.18 | 2.57 | 1.35 | 0.52 | -0.49 | -0.19 | 1.67 | 2.73 | 2.95 | 0.89 | -0.07 | 1.82 | 2.34 | 2.94 | 0.06 | 2.17 | 0.47 | 2.17 | 1.16 | 1.91 | 0.82 | 1.50 | 1.10 | 2.21 | -0.27 | 1.53 | 0.76 | 1.63 | 2.15 | -0.31 | 1.80 | 1.43 | -0.09 | 1.58 | 1.57 | 1.55 | 0.52 | 1.93 | 2.45 | -1.75 | 1.67 | 1.56 | 0.85 | -0.97 | 0.87 | 2.88 | -0.64 | 2.46 | 0.49 | 2.15 | 1.38 | -0.47 | 1.67 | -0.74 | 0.86 | -1.21 | -0.25 | -2.52 | -1.79 | -0.23 | 0.24 | -1.84 | -0.19 | -1.73 | -2.21 | -1.69 | -0.79 | -2.04 | -0.08 | -0.50 | -0.71 | -2.28 | -0.06 | -0.03 | -0.04 | -1.53 | 1.59 | -1.08 | 0.60 | -1.04 | -1.08 | -1.22 | -3.43 | -1.18 | 2.15 | -1.03 | 1.91 | -0.56 | -1.75 | -0.91 | -1.98 | 1.22 | -2.17 | -2.47 | -1.47 | 0.59 | -1.78 | -1.13 | 0.92 | 1.04 | -1.04 | -1.27 | -1.57 | -2.40 | 0.03 | -1.28 | 0.49 | -1.16 | 1.23 | -1.21 | -0.24 | -2.65 | 0.47 | -0.55 | 1.27 | -1.91 | 1.25 | -2.25 | -0.92 | 1.75 | -0.07 | 1.31 | 0.43 | 1.53 | 1.09 | 0.88 | 0.58 | -0.26 | 1.19 | 1.19 | 0.85 | 2.08 | 0.23 | -0.29 | 0.91 | 2.61 | 0.98 | 1.29 | 1.51 | 2.84 | -0.08 | 0.49 | 1.23 | 1.94 | -1.44 | 1.57 | 0.81 | 1.44 | 0.61 | 1.64 | -0.24 | 2.96 | 1.21 | 2.71 | 2.05 | 1.25 | 1.53 | 2.45 | -0.63 | 2.72 | 2.68 | 1.97 | 4.42 | 2.55 | -0.60 | 2.33 | 1.15 | 3.46 | 1.96 | 0.95 | 2.37 | 4.12 | 3.28 | 1.16 | 1.51 | 3.41 | 2.32 | 3.55 | 2.68 | 1.52 | -1.54 | 2.29 | 3.02 | 2.71 | 1.66 | 2.09 | 1.57 | 2.18 | 2.34 | 0.92 | 2.79 | 2.32 | 2.34 | 2.91 | 2.22 | 3.10 | 2.10 | 4.37 | 2.65 | 2.76 | 3.00 | 0.85 | 0.27 | 2.48 | 1.98 | 2.25 | 1.84 | 2.35 | 0.30 | 2.46 | 2.27 |
| 3 | -1.19 | -2.28 | -2.21 | -1.92 | -0.31 | 0.40 | -2.71 | 1.81 | 1.69 | 0.50 | 1.58 | -1.01 | -2.44 | -1.32 | -1.18 | -1.89 | 0.76 | -0.97 | 1.20 | -0.69 | 1.64 | 2.65 | -1.85 | -0.45 | -1.12 | -1.32 | 0.07 | -0.23 | 0.79 | -0.74 | 0.07 | -1.05 | 1.51 | 2.79 | -2.18 | 2.14 | -0.59 | -1.04 | 2.02 | -0.66 | 1.78 | 0.77 | -0.15 | 0.36 | -0.35 | -0.26 | 1.40 | -0.65 | -0.19 | -1.05 | 0.15 | -1.08 | 1.91 | 1.84 | -1.22 | 1.24 | -0.19 | 1.24 | -0.81 | -0.67 | -0.33 | 1.21 | -1.36 | -0.53 | 0.57 | -0.42 | -1.60 | 0.55 | -0.16 | 1.58 | 1.49 | 1.18 | 1.40 | 0.96 | -1.31 | 1.81 | 1.31 | -1.24 | 0.50 | -2.01 | -1.87 | -1.62 | -2.15 | -0.05 | -0.59 | 0.11 | 0.60 | 1.08 | 1.59 | -0.70 | 1.34 | 0.74 | 0.78 | -1.31 | -0.28 | 1.47 | 1.14 | 1.73 | 1.55 | 1.15 | 1.27 | -1.75 | 0.03 | 0.41 | 1.00 | -1.23 | 1.02 | 1.53 | 1.24 | 2.56 | 2.21 | 1.69 | 1.61 | 2.03 | 0.84 | 1.38 | 1.37 | 1.12 | 1.82 | 1.19 | 1.39 | 1.77 | 0.61 | 1.22 | 2.38 | 1.02 | 2.42 | 2.94 | 1.94 | 2.40 | 1.85 | 2.12 | 1.52 | 1.60 | 1.55 | 2.18 | -1.38 | 3.53 | 1.80 | 1.27 | 4.88 | 1.29 | 2.32 | 1.31 | -1.19 | 2.88 | -0.72 | 1.53 | 4.56 | 0.54 | 1.80 | 1.75 | 0.83 | 1.68 | 1.79 | 2.82 | 1.62 | 2.14 | 3.77 | 0.41 | 1.67 | 2.82 | 3.38 | 2.63 | 0.98 | 2.16 | 1.15 | 1.14 | 1.60 | 2.01 | 1.63 | 2.37 | -1.91 | 2.62 | 2.10 | -0.55 | 2.56 | 2.12 | 1.68 | 1.87 | 2.46 | 2.48 | 0.70 | 0.88 | 0.90 | -0.67 | 1.74 | 2.13 | 2.63 | 1.25 | 2.08 | 3.01 | -0.81 | 2.40 | 1.26 | -1.19 | -1.24 | 1.42 | 1.39 | -0.46 | -0.46 | 0.12 | 0.77 | 0.39 | -0.69 | -2.62 | 0.91 | 1.21 | 0.75 | 1.18 | 0.84 | 1.26 | 0.89 | 1.33 | -1.27 | -1.46 | 1.04 | 0.50 | 0.38 | 0.63 | 1.42 | 1.80 | 1.49 | -1.08 | 0.55 | -0.41 | 0.11 | 0.12 | 1.54 | 1.19 | -1.40 | 1.21 | 1.61 | -1.08 | -0.35 | -1.05 | 0.31 | 1.57 | -0.90 | 1.37 | -0.41 | 1.32 | 1.77 | 1.81 | 0.75 | 0.14 | 0.97 | 1.34 | 0.59 | 1.01 | -1.33 | 0.28 | 1.17 | -1.01 | 0.74 | 1.27 | 0.68 | 0.97 | 0.75 | 1.06 | -0.91 | -0.21 | 1.51 | -1.04 | 2.22 | 2.94 | 0.72 | 2.33 | 1.66 | -0.50 | 0.58 | 1.46 | -0.05 | 2.39 | 1.24 | 0.36 | -0.46 | -0.22 | 1.78 | 2.73 | 2.50 | 0.78 | 0.18 | 1.82 | 1.94 | 2.85 | 0.06 | 2.26 | 0.97 | 2.15 | 1.16 | 1.91 | 0.98 | 1.50 | 0.88 | 1.77 | -0.70 | 1.64 | 0.86 | 1.68 | 2.03 | -0.14 | 1.84 | 1.47 | -0.15 | 1.96 | 1.57 | 1.42 | 0.65 | 1.90 | 2.26 | -1.71 | 1.67 | 1.46 | 0.59 | -1.02 | 0.90 | 2.88 | -0.47 | 2.49 | 0.78 | 2.15 | 0.89 | -0.47 | 1.67 | -0.46 | 0.88 | -1.12 | -0.24 | -2.56 | -2.46 | -0.22 | 0.20 | -1.76 | -0.12 | -1.73 | -2.23 | -1.71 | -0.99 | -2.22 | -0.49 | -0.64 | -0.52 | -2.22 | -0.13 | -0.14 | -0.08 | -1.53 | 1.63 | -0.89 | 0.60 | -1.01 | -1.09 | -1.20 | -2.96 | -1.24 | 1.96 | -0.60 | 1.47 | -0.56 | -2.03 | -1.01 | -1.89 | 1.16 | -2.27 | -2.25 | -1.40 | 0.64 | -1.88 | -1.07 | 0.91 | 0.99 | -0.97 | -1.27 | -1.33 | -2.40 | 0.07 | -1.36 | 0.49 | -1.19 | 1.26 | -1.31 | -0.25 | -2.72 | 0.45 | -0.63 | 1.86 | -1.79 | 1.25 | -2.11 | -0.92 | 1.72 | -1.28 | 1.32 | 0.41 | 1.43 | 0.91 | 0.84 | 0.60 | -0.35 | 1.04 | 1.11 | 0.69 | 2.11 | 0.00 | 0.01 | 0.87 | 2.59 | 1.04 | 1.24 | 1.43 | 2.36 | -0.09 | 0.26 | 1.21 | 1.58 | -1.17 | 1.59 | 0.71 | 1.50 | 0.61 | 1.44 | -0.01 | 2.96 | 1.54 | 2.44 | 2.23 | 1.25 | 1.60 | 2.74 | -0.16 | 2.80 | 3.01 | 2.28 | 4.15 | 1.94 | -0.78 | 2.31 | 1.31 | 3.05 | 1.92 | 0.96 | 1.97 | 3.83 | 2.98 | 0.99 | 1.46 | 3.43 | 2.32 | 3.48 | 2.78 | 1.51 | -1.58 | 2.17 | 2.60 | 2.53 | 1.82 | 2.16 | 1.40 | 2.28 | 2.43 | 0.88 | 2.86 | 2.42 | 2.25 | 2.70 | 2.41 | 2.96 | 2.00 | 4.20 | 2.57 | 2.72 | 2.98 | 0.75 | 0.24 | 2.40 | 1.96 | 2.20 | 1.91 | 2.19 | 0.23 | 2.50 | 2.25 |
| 4 | -1.23 | -2.22 | -2.20 | -2.10 | -0.41 | 0.32 | -2.75 | 1.75 | 1.47 | 0.45 | 1.58 | -0.82 | -2.37 | -1.13 | -1.21 | -1.99 | 0.72 | -1.22 | 1.03 | -0.72 | 1.58 | 2.59 | -2.12 | -0.95 | -1.19 | -1.43 | -0.06 | -0.43 | 0.79 | -0.72 | -0.09 | -0.91 | 1.39 | 2.63 | -2.13 | 2.29 | -0.82 | -1.08 | 1.84 | -0.75 | 1.64 | 0.60 | -0.12 | 0.34 | -0.96 | -0.83 | 1.40 | -0.70 | -0.37 | -1.12 | 0.24 | -0.96 | 1.55 | 1.84 | -1.19 | 1.27 | -0.47 | 1.24 | -1.30 | -0.50 | -0.53 | 1.12 | -1.42 | -0.56 | 0.69 | -0.87 | -1.64 | 0.05 | -0.70 | 1.51 | 1.51 | 1.10 | 1.40 | 0.58 | -1.19 | 1.63 | 1.31 | -1.17 | 0.15 | -1.79 | -1.87 | -1.58 | -2.18 | -0.14 | -0.54 | -0.16 | 0.26 | 1.08 | 1.50 | -0.74 | 1.40 | 0.79 | 0.61 | -1.58 | -0.60 | 1.23 | 1.12 | 1.60 | 1.38 | 1.14 | 1.23 | -1.74 | -0.11 | 0.41 | 1.12 | -1.15 | 1.04 | 1.53 | 1.10 | 2.42 | 2.14 | 1.61 | 1.51 | 1.92 | 0.71 | 1.51 | 1.26 | 1.03 | 1.66 | 0.89 | 1.32 | 1.79 | 0.56 | 1.22 | 2.20 | 0.89 | 2.30 | 2.92 | 1.94 | 2.48 | 1.87 | 2.12 | 1.52 | 1.60 | 1.55 | 2.08 | -1.38 | 3.14 | 1.75 | 1.03 | 4.55 | 1.20 | 2.45 | 1.35 | -1.26 | 2.54 | -0.85 | 1.53 | 4.18 | 0.62 | 1.80 | 1.70 | 0.68 | 1.82 | 1.78 | 2.77 | 1.55 | 1.99 | 3.49 | 0.26 | 1.65 | 2.60 | 3.12 | 2.52 | 0.98 | 2.16 | 1.15 | 1.07 | 1.64 | 2.00 | 1.56 | 2.20 | -2.40 | 2.45 | 1.68 | -0.57 | 2.21 | 2.12 | 1.45 | 1.87 | 2.46 | 2.17 | 0.80 | 0.72 | 0.87 | -0.26 | 1.74 | 1.88 | 2.39 | 1.34 | 1.78 | 2.69 | -0.92 | 2.53 | 1.28 | -1.13 | -1.24 | 1.64 | 1.33 | -0.10 | -0.46 | 0.36 | 0.52 | 0.48 | -0.67 | -2.34 | 1.01 | 1.22 | 0.75 | 1.02 | 0.75 | 1.13 | 0.90 | 1.49 | -1.30 | -0.99 | 0.83 | 0.50 | 0.50 | 0.63 | 1.42 | 1.85 | 1.49 | -1.29 | 0.41 | -0.29 | 0.08 | -0.32 | 1.54 | 1.10 | -1.37 | 1.30 | 1.56 | -1.20 | -0.74 | -1.05 | 0.51 | 1.27 | -0.90 | 1.37 | -0.41 | 1.19 | 1.73 | 1.67 | 0.75 | 0.04 | 0.91 | 1.35 | 0.59 | 0.95 | -0.86 | 0.28 | 0.89 | -1.23 | 0.74 | 1.27 | 0.67 | 1.10 | 0.75 | 1.13 | -0.92 | -0.20 | 1.48 | -1.02 | 2.25 | 2.81 | 0.67 | 1.80 | 1.69 | -0.50 | 0.52 | 1.37 | -0.13 | 2.40 | 1.27 | 0.60 | -0.19 | -0.22 | 1.65 | 2.79 | 1.89 | 0.85 | 0.20 | 1.82 | 1.85 | 2.45 | 0.19 | 2.26 | 0.56 | 1.74 | 1.26 | 1.91 | 0.87 | 1.51 | 0.60 | 1.88 | -0.74 | 1.67 | 0.68 | 1.68 | 1.96 | 0.09 | 1.77 | 1.11 | -0.15 | 1.96 | 1.57 | 1.28 | 0.89 | 1.72 | 2.24 | -1.72 | 1.55 | 1.47 | 0.73 | -1.02 | 0.97 | 2.85 | -0.49 | 2.51 | 0.78 | 1.76 | 1.11 | -0.47 | 1.52 | -0.69 | 0.78 | -1.14 | -0.13 | -2.28 | -1.61 | -0.31 | 0.20 | -1.67 | -0.00 | -1.63 | -2.21 | -1.73 | -0.92 | -2.09 | -0.56 | -0.81 | -0.54 | -2.51 | -0.54 | -0.28 | -0.08 | -1.53 | 1.10 | -0.60 | 0.60 | -0.99 | -1.15 | -1.22 | -2.75 | -1.28 | 2.02 | -0.63 | 1.47 | -0.51 | -2.03 | -0.88 | -1.87 | 1.21 | -2.11 | -2.20 | -1.32 | 0.64 | -1.86 | -1.01 | 1.03 | 1.06 | -1.31 | -1.32 | -1.23 | -2.88 | 0.04 | -0.93 | 0.49 | -1.24 | 1.43 | -1.29 | -0.14 | -2.87 | 0.44 | -0.46 | 1.86 | -1.65 | 1.20 | -1.98 | -0.94 | 1.77 | -1.28 | 1.26 | 0.42 | 1.28 | 1.27 | 0.85 | 0.55 | -0.48 | 1.11 | 1.08 | 0.60 | 2.04 | -0.01 | -0.13 | 0.87 | 2.40 | 0.82 | 1.44 | 1.30 | 2.36 | 0.09 | 0.33 | 1.30 | 1.58 | -1.19 | 1.59 | 0.15 | 1.50 | 0.95 | 1.44 | -0.83 | 2.68 | 1.32 | 2.44 | 1.98 | 1.30 | 1.25 | 2.27 | -0.02 | 2.80 | 3.01 | 2.01 | 3.88 | 1.94 | -1.06 | 2.21 | 1.14 | 2.45 | 1.86 | 0.73 | 1.97 | 3.48 | 2.57 | 0.99 | 1.46 | 3.43 | 2.32 | 3.26 | 2.63 | 1.39 | -1.43 | 1.99 | 2.60 | 2.38 | 1.82 | 1.92 | 1.32 | 2.19 | 2.37 | 0.78 | 2.59 | 2.32 | 2.33 | 2.63 | 2.36 | 2.93 | 1.96 | 4.03 | 2.47 | 2.78 | 2.54 | 0.74 | 0.43 | 2.40 | 1.85 | 2.06 | 1.75 | 2.08 | 0.29 | 2.43 | 2.30 |
Out[3]:
(2340, 482)
In [4]:
from scipy.stats import pearsonr, spearmanr, kendalltau
from xicor.xicor import Xi
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
from scipy.stats import pearsonr, spearmanr, kendalltau
from xicor.xicor import Xi
In [ ]:
from numpy import array, random, arange
def xicor(X, Y, ties=True):
# 设置随机种子(可选)
# random.seed(42)
# 获取数据长度
n = len(X)
# 根据X的值对索引进行排序
order = array([i[0] for i in sorted(enumerate(X), key=lambda x: x[1])])
if ties:
# 计算每个元素在Y中的排名
l = array([sum(y >= Y[order]) for y in Y[order]])
# 初始化r为l的副本
r = l.copy()
# 处理并列的情况
for j in range(n):
tie_count = sum(r[j] == r[i] for i in range(n))
if tie_count > 1:
tie_index = array([r[j] == r[i] for i in range(n)])
r[tie_index] = random.choice(r[tie_index] - arange(0, tie_count), tie_count, replace=False)
# 计算XICOR相关系数
return 1 - n * sum(abs(r[1:] - r[:n-1])) / (2 * sum(l * (n - l)))
else:
# 计算每个元素在Y中的排名
r = array([sum(y >= Y[order]) for y in Y[order]])
# 计算XICOR相关系数(不处理并列情况)
return 1 - 3 * sum(abs(r[1:] - r[:n-1])) / (n**2 - 1)
# 示例用法
X = [1, 2, 3, 4, 5]
Y = [5, 4, 3, 2, 1]
print(xicor(X, Y, ties=True))
- 模拟实验1 不同分布随机变量的度量情况
In [5]:
def comparison_xicor_difference_pdf(calculation ,title_name='各种分布假定下的xicor系数对sin函数的衡量',gap=5,n=50):
beta=[];binomial=[];chisquare=[];exponential=[];f=[];gamma=[]
geometric=[];gumbel=[];laplace=[];logistic =[];lognormal =[];
negative_binomial=[];noncentral_chisquare=[];noncentral_f=[];
norm=[];pareto=[];t=[];uniform=[];weibull=[];
for i in [gap*a for a in range(n)]:
x_beta=np.random.beta(a=2,b=2,size=i+2)
x_binomial=np.random.binomial(n=10,p=0.5,size=i+2)
x_chisquare=np.random.chisquare(df=5,size=i+2)
x_exponential=np.random.exponential(scale=1,size=i+2)
x_f=np.random.f(dfnum=2, dfden=2,size=i+2)
x_gamma=np.random.gamma(shape=5,scale=1,size=i+2)
x_geometric=np.random.geometric(p=0.5,size=i+2)
x_gumbel=np.random.gumbel(loc=0,scale=1,size=i+2)
x_laplace=np.random.laplace(loc=0,scale=1,size=i+2)
x_logistic=np.random.logistic(loc=0,scale=1,size=i+2)
x_lognormal=np.random.lognormal(mean=0,sigma=1,size=i+2)
x_negative_binomial=np.random.negative_binomial(n=5,p=0.5,size=i+2)
x_noncentral_chisquare=np.random.noncentral_chisquare(df=5,nonc=5,size=i+2)
x_noncentral_f=np.random.noncentral_f(dfnum=5,dfden=5,nonc=5,size=i+2)
x_norm=np.random.normal(loc=0,scale=1,size=i+2)
x_pareto=np.random.pareto(a=5,size=i+2)
x_t=np.random.standard_t(df=5,size=i+2)
x_uniform=np.random.uniform(low=0,high=1,size=i+2)
x_weibull=np.random.weibull(a=5,size=i+2)
y_beta=calculation(x_beta)
y_binomial=calculation(x_binomial)
y_chisquare=calculation(x_chisquare)
y_exponential=calculation(x_exponential)
y_f=calculation(x_f)
y_gamma=calculation(x_gamma)
y_geometric=calculation(x_geometric)
y_gumbel=calculation(x_gumbel)
y_laplace=calculation(x_laplace)
y_logistic=calculation(x_logistic)
y_lognormal=calculation(x_lognormal)
y_negative_binomial=calculation(x_negative_binomial)
y_noncentral_chisquare=calculation(x_noncentral_chisquare)
y_noncentral_f=calculation(x_noncentral_f)
y_norm=calculation(x_norm)
y_pareto=calculation(x_pareto)
y_t=calculation(x_t)
y_uniform=calculation(x_uniform)
y_weibull=calculation(x_weibull)
beta.append(max(Xi(list(x_beta),list(y_beta)).correlation,Xi(list(y_beta),list(x_beta)).correlation))
binomial.append(max(Xi(list(x_binomial),list(y_binomial)).correlation,Xi(list(y_binomial),list(x_binomial)).correlation))
chisquare.append(max(Xi(list(x_chisquare),list(y_chisquare)).correlation,Xi(list(y_chisquare),list(x_chisquare)).correlation))
exponential.append(max(Xi(list(x_exponential),list(y_exponential)).correlation,Xi(list(y_exponential),list(x_exponential)).correlation))
f.append(max(Xi(list(x_f),list(y_f)).correlation,Xi(list(y_f),list(x_f)).correlation))
gamma.append(max(Xi(list(x_gamma),list(y_gamma)).correlation,Xi(list(y_gamma),list(x_gamma)).correlation))
geometric.append(max(Xi(list(x_geometric),list(y_geometric)).correlation,Xi(list(y_geometric),list(x_geometric)).correlation))
gumbel.append(max(Xi(list(x_gumbel),list(y_gumbel)).correlation,Xi(list(y_gumbel),list(x_gumbel)).correlation))
laplace.append(max(Xi(list(x_laplace),list(y_laplace)).correlation,Xi(list(y_laplace),list(x_laplace)).correlation))
logistic.append(max(Xi(list(x_logistic),list(y_logistic)).correlation,Xi(list(y_logistic),list(x_logistic)).correlation))
lognormal.append(max(Xi(list(x_lognormal),list(y_lognormal)).correlation,Xi(list(y_lognormal),list(x_lognormal)).correlation))
negative_binomial.append(max(Xi(list(x_negative_binomial),list(y_negative_binomial)).correlation,Xi(list(y_negative_binomial),list(x_negative_binomial)).correlation))
noncentral_chisquare.append(max(Xi(list(x_noncentral_chisquare),list(y_noncentral_chisquare)).correlation,Xi(list(y_noncentral_chisquare),list(x_noncentral_chisquare)).correlation))
noncentral_f.append(max(Xi(list(x_noncentral_f),list(y_noncentral_f)).correlation,Xi(list(y_noncentral_f),list(x_noncentral_f)).correlation))
norm.append(max(Xi(list(x_norm),list(y_norm)).correlation,Xi(list(y_norm),list(x_norm)).correlation))
pareto.append(max(Xi(list(x_pareto),list(y_pareto)).correlation,Xi(list(y_pareto),list(x_pareto)).correlation))
t.append(max(Xi(list(x_t),list(y_t)).correlation,Xi(list(y_t),list(x_t)).correlation))
uniform.append(max(Xi(list(x_uniform),list(y_uniform)).correlation,Xi(list(y_uniform),list(x_uniform)).correlation))
weibull.append(max(Xi(list(x_weibull),list(y_weibull)).correlation,Xi(list(y_weibull),list(x_weibull)).correlation))
plt.figure()
plt.plot(range(n), beta, marker=None, label='beta')
plt.plot(range(n), binomial, marker=None, label='binomial')
plt.plot(range(n), chisquare, marker=None, label='chisquare')
plt.plot(range(n), exponential, marker=None, label='exponential')
plt.plot(range(n), f, marker=None, label='f')
plt.plot(range(n), gamma, marker=None, label='gamma')
plt.plot(range(n), geometric, marker=None, label='geometric')
plt.plot(range(n), gumbel, marker=None, label='gumbel')
plt.plot(range(n), laplace, marker=None, label='laplace')
plt.plot(range(n), logistic, marker=None, label='logistic')
plt.plot(range(n), lognormal, marker=None, label='lognormal')
plt.plot(range(n), negative_binomial, marker=None, label='negative_binomial')
plt.plot(range(n), noncentral_chisquare, marker=None, label='noncentral_chisquare')
plt.plot(range(n), noncentral_f, marker=None, label='noncentral_f')
plt.plot(range(n), norm, marker=None, label='norm')
plt.plot(range(n), pareto, marker=None, label='pareto')
plt.plot(range(n), t, marker=None, label='t')
plt.plot(range(n), uniform, marker=None, label='uniform')
plt.plot(range(n), weibull, marker=None, label='weibull')
plt.title(title_name);plt.xlabel('样本数/{}'.format(gap));plt.ylabel('rho_Value');plt.grid(False)
# 显示图例,网格(可选)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.show()
In [6]:
comparison_xicor_difference_pdf(lambda x: np.sin(x),n=50)
c:\anaconda\envs\pytorch\lib\site-packages\xicor\xicor.py:81: RuntimeWarning: invalid value encountered in scalar divide return 1 - self.mean_absolute / self.inverse_g_mean
In [6]:
# #R语言的RDC 函数
# rdc <- function(x,y,k,s) {
# x <- cbind(apply(as.matrix(x),2,function(u) ecdf(u)(u)),1)
# y <- cbind(apply(as.matrix(y),2,function(u) ecdf(u)(u)),1)
# wx <- matrix(rnorm(ncol(x)*k,0,s),ncol(x),k)
# wy <- matrix(rnorm(ncol(y)*k,0,s),ncol(y),k)
# cancor(cbind(cos(x%*%wx),sin(x%*%wx)), cbind(cos(y%*%wy),sin(y%*%wy)))$cor[1]
# }
# x <- matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)
# y <- matrix(c(5, 6, 7, 8), nrow = 2, byrow = TRUE)
# result <- rdc(x, y, k = 5, s = 1)
# cat(sprintf("%.7f\n", result))
#随机相依系数函数实现
from scipy.stats import rankdata
from scipy.linalg import svd
def rdc(x, y, k, s):
# Convert to numpy arrays if they are not already
x = np.array(x)
y = np.array(y)
# Apply ECDF to each column of x and y
x_ecdf = np.apply_along_axis(lambda u: rankdata(u, method='max') / len(u), axis=0, arr=x)
y_ecdf = np.apply_along_axis(lambda u: rankdata(u, method='max') / len(u), axis=0, arr=y)
# Add a column of ones
x = np.c_[x_ecdf, np.ones(x.shape[0])]
y = np.c_[y_ecdf, np.ones(y.shape[0])]
# Generate random matrices wx and wy
wx = np.random.normal(0, s, (x.shape[1], k))
wy = np.random.normal(0, s, (y.shape[1], k))
# Compute the projections
x_proj = np.cos(np.dot(x, wx)) + 1j * np.sin(np.dot(x, wx))
y_proj = np.cos(np.dot(y, wy)) + 1j * np.sin(np.dot(y, wy))
# Compute the canonical correlation
Ux, Sx, Vx = svd(x_proj, full_matrices=False)
Uy, Sy, Vy = svd(y_proj, full_matrices=False)
# Compute the correlation matrix
cor_matrix = np.dot(Ux.T, Uy)
# Return the first canonical correlation coefficient
return np.abs(cor_matrix).max()
# Example usage:
x = [[1, 2], [3, 4]]
y = [[5, 6], [7, 8]]
print(rdc(x, y, k=5, s=0))
1.0
- 模拟实验2
In [7]:
#衡量交互效应
def interaction_effect_three(pdf_type='difference',choose='x1',gap=5,min_=0,max_=100,n=50):
x1_112=[];x1_113=[];x1_122=[];x1_133=[];x1_123=[];
x2_112=[];x2_122=[];x2_123=[];x2_223=[];x2_233=[];
x3_113=[];x3_123=[];x3_133=[];x3_223=[];x3_233=[];
for i in [gap*a for a in range(n)]:
if pdf_type=='difference':
x1=np.linspace(min_,max_,i+2) #生成i+2个数据点
x2=np.random.randint(min_,max_,i+2)
x3=np.random.binomial(int(max_),0.5,i+2)
elif pdf_type=='Bernoulli':
x1=np.random.binomial(1,0.5,i+2) #生成i+2个数据点
x2=np.random.binomial(1,0.5,i+2)
x3=np.random.binomial(1,0.5,i+2)
elif pdf_type=='binomial':
x1=np.random.binomial(int(max_),0.5,i+2)
x2=np.random.binomial(int(max_),0.5,i+2)
x3=np.random.binomial(int(max_),0.5,i+2)
x1x1x2=x1*x1*x2;x1x1x3=x1*x1*x3;x1x2x2=x1*x2*x2;x1x3x3=x1*x3*x3;
x1x2x3=x1*x2*x3;x2x2x3=x2*x2*x3;x2x3x3=x2*x3*x3
if choose=='x1':
x1_112.append(max(Xi(list(x1),list(x1x1x2)).correlation,Xi(list(x1x1x2),list(x1)).correlation))
x1_113.append(max(Xi(list(x1),list(x1x1x3)).correlation,Xi(list(x1x1x3),list(x1)).correlation))
x1_122.append(max(Xi(list(x1),list(x1x2x2)).correlation,Xi(list(x1x2x2),list(x1)).correlation))
x1_133.append(max(Xi(list(x1),list(x1x3x3)).correlation,Xi(list(x1x3x3),list(x1)).correlation))
x1_123.append(max(Xi(list(x1),list(x1x2x3)).correlation,Xi(list(x1x2x3),list(x1)).correlation))
if choose=='x2':
x2_112.append(max(Xi(list(x2),list(x1x1x2)).correlation,Xi(list(x1x1x2),list(x2)).correlation))
x2_122.append(max(Xi(list(x2),list(x1x2x2)).correlation,Xi(list(x1x2x2),list(x2)).correlation))
x2_123.append(max(Xi(list(x2),list(x1x2x3)).correlation,Xi(list(x1x2x3),list(x2)).correlation))
x2_223.append(max(Xi(list(x2),list(x2x2x3)).correlation,Xi(list(x2x2x3),list(x2)).correlation))
x2_233.append(max(Xi(list(x2),list(x2x3x3)).correlation,Xi(list(x2x3x3),list(x2)).correlation))
if choose=='x3':
x3_113.append(max(Xi(list(x3),list(x1x1x3)).correlation,Xi(list(x1x1x3),list(x3)).correlation))
x3_123.append(max(Xi(list(x3),list(x1x2x3)).correlation,Xi(list(x1x2x3),list(x3)).correlation))
x3_133.append(max(Xi(list(x3),list(x1x3x3)).correlation,Xi(list(x1x3x3),list(x3)).correlation))
x3_223.append(max(Xi(list(x3),list(x2x2x3)).correlation,Xi(list(x2x2x3),list(x3)).correlation))
x3_233.append(max(Xi(list(x3),list(x2x3x3)).correlation,Xi(list(x2x3x3),list(x3)).correlation))
if choose=='x1':
y1=x1_112;y2=x1_113;y3=x1_122;y4=x1_133;y5=x1_123
label1=r'$\xi \left( x_1,x_1x_1x_2 \right) $'
label2=r'$\xi \left( x_1,x_1x_1x_3 \right) $'
label3=r'$\xi \left( x_1,x_1x_2x_2 \right) $'
label4=r'$\xi \left( x_1,x_1x_3x_3 \right) $'
label5=r'$\xi \left( x_1,x_1x_2x_3 \right) $'
if choose=='x2':
y1=x2_112;y2=x2_122;y3=x2_123;y4=x2_223;y5=x2_233
label1=r'$\xi \left( x_2,x_1x_1x_2 \right) $'
label2=r'$\xi \left( x_2,x_1x_2x_2 \right) $'
label3=r'$\xi \left( x_2,x_1x_2x_3 \right) $'
label4=r'$\xi \left( x_2,x_2x_2x_3 \right) $'
label5=r'$\xi \left( x_2,x_2x_3x_3 \right) $'
if choose=='x3':
y1=x3_113;y2=x3_123;y3=x3_133;y4=x3_223;y5=x3_233
label1=r'$\xi \left( x_3,x_1x_1x_3 \right) $'
label2=r'$\xi \left( x_3,x_1x_2x_3 \right) $'
label3=r'$\xi \left( x_3,x_1x_3x_3 \right) $'
label4=r'$\xi \left( x_3,x_2x_2x_3 \right) $'
label5=r'$\xi \left( x_3,x_2x_3x_3 \right) $'
plt.figure()
plt.plot(range(n), y1, marker=None, label=label1)
plt.plot(range(n), y2, marker=None, label=label2)
plt.plot(range(n), y3, marker=None, label=label3)
plt.plot(range(n), y4, marker=None, label=label4)
plt.plot(range(n), y5, marker=None, label=label5)
plt.title('{}的交互作用图'.format(choose));plt.xlabel('样本数/{}'.format(gap));plt.ylabel('rho_Value');plt.grid(False)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.show()
In [8]:
interaction_effect_three(pdf_type='difference',choose='x1',gap=5,min_=0,max_=100,n=500)
In [9]:
interaction_effect_three(pdf_type='difference',choose='x2',gap=5,min_=0,max_=100,n=500)
In [10]:
interaction_effect_three(pdf_type='difference',choose='x3',gap=5,min_=0,max_=100,n=500)
In [11]:
interaction_effect_three(pdf_type='Bernoulli',choose='x1',gap=5,min_=0,max_=100,n=500)
c:\anaconda\envs\pytorch\lib\site-packages\xicor\xicor.py:81: RuntimeWarning: invalid value encountered in scalar divide return 1 - self.mean_absolute / self.inverse_g_mean
In [12]:
interaction_effect_three(pdf_type='Bernoulli',choose='x2',gap=5,min_=0,max_=100,n=500)
In [13]:
interaction_effect_three(pdf_type='Bernoulli',choose='x3',gap=5,min_=0,max_=100,n=500)
In [14]:
interaction_effect_three(pdf_type='binomial',choose='x1',gap=5,min_=0,max_=100,n=500)
In [15]:
interaction_effect_three(pdf_type='binomial',choose='x2',gap=5,min_=0,max_=100,n=500)
In [16]:
interaction_effect_three(pdf_type='binomial',choose='x3',gap=5,min_=0,max_=100,n=500)
模拟实验3¶
In [17]:
#模拟实验3
def comparison_single_varible(calculation,title_name=None,gap=5,min_=0,max_=100,n=50):
pearson1=[];spearman1=[];kendall1=[];Xi1=[];rdc1=[];
for i in [gap*a for a in range(n)]:
# x1=np.random.normal(0,1,i+2)
x1=np.linspace(min_,max_,i+2) #生成i+2个数据点
y1=calculation(x1)
pearson,pearson_p=pearsonr(x1, y1)
spearman,spearman_p=spearmanr(x1, y1)
kendall,kendall_p=kendalltau(x1, y1)
# rdcr=rdc(x1, y1,5,1)
Xir=max(Xi(list(x1),list(y1)).correlation,Xi(list(y1),list(x1)).correlation)
# Xir_p=Xi(list(x1),list(y1)).pval_asymptotic(ties=True)
pearson1.append(pearson)
spearman1.append(spearman)
kendall1.append(kendall)
# rdc1.append(rdcr)
Xi1.append(Xir)
plt.figure()
plt.plot(range(n), pearson1, marker=None, label='pearson')# 绘制pearson的折线图,marker='o'or's'
plt.plot(range(n), spearman1, marker=None, label='spearman')# 绘制spearman数据集的折线图
plt.plot(range(n), kendall1, marker=None, label='kendall')# 绘制kendall数据集的折线图
plt.plot(range(n), Xi1, marker=None, label='Xi') # 绘制xicor数据集的折线图
# plt.plot(range(n), rdc1, marker=None, label='rdc') # 绘制rdc数据集的折线图
plt.title(title_name);plt.xlabel('样本数/{}'.format(gap));plt.ylabel('rho_Value');plt.legend();plt.grid(False)
# 显示图例,网格(可选)
plt.show()
comparison_single_varible(lambda x:np.cumsum(x),title_name=r'$y\propto \sum{x}$') #模拟一个自变量为x的函数,y=x1+x2+x3+...+xn
In [18]:
comparison_single_varible(lambda x:pow(x,2)+pow(x,3),title_name=r'$y\propto x^2+x^3$') # y=x^2+x^3
In [19]:
comparison_single_varible(lambda x:np.exp(x),title_name=r'$y\propto exp(x)$') #y=exp(x)
In [20]:
comparison_single_varible(lambda x:np.sin(x),title_name=r'$y\propto sinx$',n=50) #y=sinx 样本量=250
In [21]:
# comparison_single_varible(lambda x:np.sin(x),title_name=r'$y\propto sinx$',n=500) #y=sinx 样本量=2500
In [22]:
comparison_single_varible(lambda x:np.sin(x)/x,title_name=r'$y\propto \frac{\sin x}{x}$',min_=0.1,n=50) #y=sinx/x 样本量=250
In [23]:
# comparison_single_varible(lambda x:x/np.sin(x),title_name=r'$y\propto \frac{x}{\sin x}$',min_=0.1,max_=100,n=50) #y=x/sinx 样本量=250
In [24]:
comparison_single_varible(lambda x:x/np.sin(x),title_name=r'$y\propto \frac{x}{\sin x}$',min_=0.1,max_=100,n=500) #y=x/sinx 样本量=2500
In [25]:
comparison_single_varible(lambda x:x/np.sin(x),title_name=r'$y\propto \frac{x}{\sin x}$',min_=0.1,max_=10000,n=50) #y=x/sinx 样本量=2500
In [26]:
comparison_single_varible(lambda x:x/np.sin(x),title_name=r'$y\propto \frac{x}{\sin x}$',min_=0.1,max_=200,n=500) #y=x/sinx 样本量=2500
In [27]:
comparison_single_varible(lambda x:pow(x,x),title_name=r'$y\propto x^x$',min_=0.1,n=50) #y=x^x 样本量=250
In [28]:
comparison_single_varible(lambda x:(pow(x,2.5)+x*np.sin(x**2)+np.log(x+1)+np.exp(x)/x)/10,min_=0.0001,max_=7,title_name=r'$y\propto x^{2.5}+xsin(x^2)+\ln \left( x+1 \right) -\frac{exp(x)}{x}$',n=50) #y=复杂函数 样本量=250
In [29]:
comparison_single_varible(lambda x:np.random.randint(x),title_name='无关系时',min_=0.1,n=50) #无关样本,样本量=250
In [30]:
comparison_single_varible(lambda x:np.random.randint(x),title_name='无关系时',min_=0.1,n=500) #无关样本,样本量=2500
In [31]:
#离散随机变量的分段函数情况
def dicrete_cut(x):
y=[]
for i in range(len(x)):
if x[i]<5: y.append(-1)
elif x[i]<10: y.append(0)
else: y.append(1)
return y
#连续随机变量的分段函数情况
def continue_cut(x):
y=[]
for i in range(len(x)):
if x[i]<5:y.append(-1)
elif x[i]<10: y.append(np.exp(-x[i])+10)
elif x[i]<15: y.append(np.sin(x[i]))
else: y.append(3)
return y
In [32]:
comparison_single_varible(lambda x : dicrete_cut(x),title_name=r'$y\propto -1\left( x<5 \right) \cup 0\left( 5\leqslant x<10 \right) \cup 1\left( 10\leqslant x \right) $',min_=0.1,max_=20,n=50) #离散分段函数,样本量=2500
In [33]:
comparison_single_varible(lambda x : continue_cut(x),title_name='连续的分段函数',min_=0.1,max_=20,n=50) #'连续的分段函数',样本量=250
In [34]:
comparison_single_varible(lambda x :np.sin(pow(x,2)+np.sin(x)),title_name='',min_=0,max_=10,n=50) #'连续的分段函数',样本量=250
降维分析¶
主成分分析 ¶
In [191]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
# 1.导入数据
X_scaled = x_data.to_numpy()
# 2. PCA降维
pca = PCA(n_components=6)
X_pca = pca.fit_transform(X_scaled)
# 主成分方向
components = pca.components_
# 3. 绘图
fig, ax = plt.subplots(1, 2, figsize=(12, 6), dpi=120)
# 投影后数据散点图
ax[0].scatter(X_pca[:, 0], X_pca[:, 1], c='magenta', alpha=0.7, edgecolors='k')
ax[0].set_title("主成分后的数据")
ax[0].set_xlabel("Principal Component 1")
ax[0].set_ylabel("Principal Component 2")
# 主成分方向的可视化
for i, (comp, var) in enumerate(zip(components, pca.explained_variance_)):
ax[1].arrow(0, 0, comp[0] * var, comp[1] * var,
color=f'C{i}', width=0.02, head_width=0.1)#, label=f'PC{i+1}'
ax[1].scatter(X_scaled[:, 0], X_scaled[:, 1], c='lightgreen', alpha=0.7, edgecolors='k')
ax[1].legend()
ax[1].set_title("在原本空间上的主成分")
ax[1].set_xlabel("Feature 1")
ax[1].set_ylabel("Feature 2")
plt.tight_layout()
plt.show()
Out[191]:
<matplotlib.collections.PathCollection at 0x1958fe7c790>
Out[191]:
Text(0.5, 1.0, '主成分后的数据')
Out[191]:
Text(0.5, 0, 'Principal Component 1')
Out[191]:
Text(0, 0.5, 'Principal Component 2')
Out[191]:
<matplotlib.patches.FancyArrow at 0x1959008ec10>
Out[191]:
<matplotlib.patches.FancyArrow at 0x1959008eeb0>
Out[191]:
<matplotlib.patches.FancyArrow at 0x19590045df0>
Out[191]:
<matplotlib.patches.FancyArrow at 0x195900832b0>
Out[191]:
<matplotlib.patches.FancyArrow at 0x19590083520>
Out[191]:
<matplotlib.patches.FancyArrow at 0x195900834f0>
Out[191]:
<matplotlib.collections.PathCollection at 0x1959008ecd0>
C:\Users\cqm\AppData\Local\Temp\ipykernel_30380\4172377372.py:30: UserWarning: No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. ax[1].legend()
Out[191]:
<matplotlib.legend.Legend at 0x1959008eb80>
Out[191]:
Text(0.5, 1.0, '在原本空间上的主成分')
Out[191]:
Text(0.5, 0, 'Feature 1')
Out[191]:
Text(0, 0.5, 'Feature 2')
因子分析 ¶
In [192]:
from sklearn.decomposition import FactorAnalysis
import seaborn as sns
#1. 数据准备
data = x_data
columns = x_data.columns
# 2. 因子分析
fa = FactorAnalysis(n_components=6, random_state=42)
fa.fit(data)
# 提取因子载荷矩阵和因子得分
factor_loadings = fa.components_.T
factor_scores = fa.transform(data)
# 3. 可视化
plt.figure(figsize=(12, 6))
# 因子载荷热力图
plt.subplot(1, 2, 1)
sort_df = pd.DataFrame(factor_loadings.T,columns=columns ).T
sort_df=sort_df.sort_values(by=[0,1], ascending=[False,False]).T
sort_df_numpy=sort_df.to_numpy().T
sns.heatmap(sort_df_numpy[:30,:], annot=True, cmap="YlGnBu", xticklabels=["Factor1", "Factor2"], yticklabels=sort_df.columns[:30])
plt.title("因子载荷热力图")
plt.xlabel("Factors")
plt.ylabel("Variables")
# 因子得分散点图
plt.subplot(1, 2, 2)
plt.scatter(factor_scores[:, 0], factor_scores[:, 1], c="tomato", alpha=0.7, edgecolor="k")
plt.axhline(0, color="black", linestyle="--", linewidth=0.8)
plt.axvline(0, color="black", linestyle="--", linewidth=0.8)
plt.title("因子得分散点图")
plt.xlabel("Factor 1")
plt.ylabel("Factor 2")
plt.tight_layout()
plt.show()
Out[192]:
FactorAnalysis(n_components=6, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
FactorAnalysis(n_components=6, random_state=42)
Out[192]:
<Figure size 1200x600 with 0 Axes>
Out[192]:
<Axes: >
Out[192]:
<Axes: >
Out[192]:
Text(0.5, 1.0, '因子载荷热力图')
Out[192]:
Text(0.5, 38.72222222222221, 'Factors')
Out[192]:
Text(122.72222222222221, 0.5, 'Variables')
Out[192]:
<Axes: >
Out[192]:
<matplotlib.collections.PathCollection at 0x1959059e340>
Out[192]:
<matplotlib.lines.Line2D at 0x1958ff318e0>
Out[192]:
<matplotlib.lines.Line2D at 0x195c9d771c0>
Out[192]:
Text(0.5, 1.0, '因子得分散点图')
Out[192]:
Text(0.5, 0, 'Factor 1')
Out[192]:
Text(0, 0.5, 'Factor 2')
In [110]:
Communication_Services= ['GOOGL', 'GOOG', 'T', 'CHTR', 'CMCSA', 'EA', 'FOXA', 'FOX', 'IPG', 'LYV', 'MTCH', 'META', 'NFLX', 'NWSA', 'NWS', 'OMC', 'PARA',
'TMUS', 'TTWO', 'VZ', 'DIS', 'WBD']
Consumer_Discretionary= ['ABNB', 'AMZN', 'APTV', 'BBY', 'BWA', 'CZR', 'KMX', 'CCL', 'CMG', 'DHI', 'DRI', 'DECK', 'DPZ', 'EBAY', 'EXPE', 'F', 'GRMN',
'GM', 'GPC', 'HAS', 'HLT', 'HD', 'LVS', 'LEN', 'LKQ', 'LOW', 'LULU', 'MAR', 'MCD', 'MGM', 'MHK', 'NKE', 'NCLH', 'ORLY', 'PHM', 'RL',
'ROST', 'RCL', 'SBUX', 'TPR', 'TSLA', 'TJX', 'TSCO', 'ULTA', 'WYNN', 'YUM']
Consumer_Staples=['MO', 'ADM', 'BG', 'CPB', 'CHD', 'CLX', 'KO', 'CL', 'CAG', 'STZ', 'COST', 'DG', 'DLTR', 'EL', 'GIS', 'HSY', 'HRL', 'SJM', 'K', 'KVUE', 'KDP',
'KMB', 'KHC', 'KR', 'LW', 'MKC', 'TAP', 'MDLZ', 'MNST', 'PEP', 'PM', 'PG', 'SYY', 'TGT', 'TSN', 'WBA', 'WMT']
Energy=['APA', 'BKR', 'CVX', 'COP', 'CTRA', 'DVN', 'FANG', 'EOG', 'EQT', 'XOM', 'HAL', 'HES', 'KMI', 'MPC', 'OXY', 'OKE', 'PSX', 'SLB', 'TRGP', 'VLO', 'WMB']
Financials=['AFL', 'ALL', 'AXP', 'AIG', 'AMP', 'AON', 'ACGL', 'AJG', 'BAC', 'BLK', 'BX', 'BK', 'BRO', 'COF', 'CBOE', 'SCHW', 'CB', 'C', 'CFG', 'CME', 'CPAY',
'DFS', 'FIS', 'FITB', 'FI', 'BEN', 'GPN', 'GL', 'GS', 'HIG', 'HBAN', 'ICE', 'IVZ', 'JKHY', 'JPM', 'KEY', 'KKR', 'L', 'MTB', 'MKTX', 'MMC', 'MA', 'MET',
'MCO', 'MS', 'MSCI', 'NDAQ', 'NTRS', 'PYPL', 'PNC', 'PFG', 'PGR', 'PRU', 'RJF', 'RF', 'SPGI', 'STT', 'SYF', 'TROW', 'TRV', 'TFC', 'USB', 'V', 'WRB', 'WFC', 'WTW']
Health_Care=['ABT', 'ABBV', 'A', 'ALGN', 'AMGN', 'BAX', 'BDX', 'TECH', 'BIIB', 'BSX', 'BMY', 'CAH', 'CTLT', 'COR', 'CNC', 'CRL', 'CI', 'COO', 'CVS', 'DHR', 'DVA',
'DXCM', 'EW', 'ELV', 'GEHC', 'GILD', 'HCA', 'HSIC', 'HOLX', 'HUM', 'IDXX', 'INCY', 'PODD', 'ISRG', 'IQV', 'JNJ', 'LH', 'LLY', 'MCK', 'MDT', 'MRK',
'MRNA', 'MOH', 'PFE', 'DGX', 'REGN', 'RMD', 'RVTY', 'SOLV', 'STE', 'SYK', 'TFX', 'TMO', 'UNH', 'UHS', 'VRTX', 'VTRS', 'WAT', 'WST', 'ZBH', 'ZTS']
Industrials=['MMM', 'AOS', 'ALLE', 'AMTM', 'AME', 'ADP', 'AXON', 'BA', 'BLDR', 'CHRW', 'CARR', 'CAT', 'CTAS', 'CPRT', 'CSX', 'CMI', 'DAY', 'DE', 'DAL', 'DOV',
'ETN', 'EMR', 'EFX', 'EXPD', 'FAST', 'FDX', 'FTV', 'GE', 'GEV', 'GNRC', 'GD', 'HON', 'HWM', 'HII', 'IEX', 'ITW', 'IR', 'J', 'JCI', 'LHX', 'LDOS',
'LMT', 'MAS', 'NSC', 'NOC', 'ODFL', 'OTIS', 'PCAR', 'PH', 'PAYX', 'PAYC', 'PNR', 'PWR', 'RSG', 'ROK', 'ROL', 'RTX', 'LUV', 'SWK', 'TXT', 'TT', 'TDG',
'UBER', 'UNP', 'UAL', 'UPS', 'URI', 'VLTO', 'VRSK', 'WAB', 'WM', 'XYL']
Information_Technology=[ 'ACN', 'ADBE', 'AMD', 'AKAM', 'APH', 'ADI', 'ANSS', 'AAPL', 'AMAT', 'ANET', 'ADSK', 'AVGO', 'CDNS', 'CDW', 'CSCO', 'CTSH', 'GLW', 'CRWD',
'DELL', 'ENPH', 'EPAM', 'FFIV', 'FSLR', 'FTNT', 'IT', 'GEN', 'GDDY', 'HPE', 'HPQ', 'IBM', 'INTC', 'INTU', 'JBL', 'JNPR', 'KEYS', 'KLAC',
'LRCX', 'MCHP', 'MU', 'MSFT', 'MPWR', 'MSI', 'NTAP', 'NVDA', 'NXPI', 'ON', 'ORCL', 'PLTR', 'PANW', 'PTC', 'QRVO', 'QCOM', 'ROP', 'CRM',
'STX', 'NOW', 'SWKS', 'SMCI', 'SNPS', 'TEL', 'TDY', 'TER', 'TXN', 'TRMB', 'VRSN', 'WDC', 'ZBRA']
Materials=['APD', 'ALB', 'AMCR', 'AVY', 'BALL', 'CE', 'CF', 'CTVA', 'DOW', 'DD', 'EMN', 'ECL', 'FMC', 'FCX', 'IFF', 'IP', 'LIN', 'LYB', 'MLM', 'MOS', 'NEM', 'NUE',
'PKG', 'PPG', 'SHW', 'SW', 'STLD', 'VMC']
Real_Estate=['ARE', 'AMT', 'AVB', 'BXP', 'CPT', 'CBRE', 'CSGP', 'CCI', 'DLR', 'EQIX', 'EQR', 'ESS', 'EXR', 'FRT', 'DOC', 'HST', 'INVH', 'IRM', 'KIM', 'MAA', 'PLD',
'PSA', 'O', 'REG', 'SBAC', 'SPG', 'UDR', 'VTR', 'VICI', 'WELL', 'WY']
Utilities=['AES', 'LNT', 'AEE', 'AEP', 'AWK', 'ATO', 'CNP', 'CMS', 'ED', 'CEG', 'D', 'DTE', 'DUK', 'EIX', 'ETR', 'EVRG', 'ES', 'EXC', 'FE', 'NEE', 'NI', 'NRG',
'PCG', 'PNW', 'PPL', 'PEG', 'SRE', 'SO', 'VST', 'WEC', 'XEL']
Industry={'Communication_Services':Communication_Services,'Consumer_Discretionary':Consumer_Discretionary,
'Consumer_Staples':Consumer_Staples,'Energy':Energy,
'Financials':Financials,'Health_Care':Health_Care,'Industrials':Industrials,
'Information_Technology':Information_Technology,'Materials':Materials,
'Real_Estate':Real_Estate,'Utilities':Utilities}
industry_dict=Industry
# 计算最长的列表长度
max_length = max(len(lst) for lst in industry_dict.values())
# 填充 NaN 以使所有列表长度相同
for key in industry_dict:
industry_dict[key] += [np.nan] * (max_length - len(industry_dict[key]))
# 转换为 DataFrame
df_industries = pd.DataFrame(industry_dict)
# 函数:根据公司名称查找行业
def find_company_industry(company_name, industry_dict):
for industry, companies in industry_dict.items():
if company_name in companies:
return industry
return None
#因子1的行业占比
sort_df = pd.DataFrame(factor_loadings.T,columns=columns ).T
sort_df=sort_df.sort_values(by=[0,1], ascending=[False,False]).T
sort_df_numpy=sort_df.to_numpy().T
columns1=sort_df.columns[:30]
# 检索每个公司的行业
company_industry_map = {company: find_company_industry(company, industry_dict) for company in columns1}
# 输出结果
for company, industry in company_industry_map.items():
print(f"公司 {company} 所在行业: {industry}")
公司 ATO 所在行业: Utilities 公司 CSX 所在行业: Industrials 公司 NSC 所在行业: Industrials 公司 CCI 所在行业: Real_Estate 公司 ED 所在行业: Utilities 公司 CI 所在行业: Health_Care 公司 PHM 所在行业: Consumer_Discretionary 公司 SO 所在行业: Utilities 公司 DHI 所在行业: Consumer_Discretionary 公司 LEN 所在行业: Consumer_Discretionary 公司 CARR 所在行业: Industrials 公司 CVS 所在行业: Health_Care 公司 LNT 所在行业: Utilities 公司 FE 所在行业: Utilities 公司 HAL 所在行业: Energy 公司 ODFL 所在行业: Industrials 公司 GNRC 所在行业: Industrials 公司 HOLX 所在行业: Health_Care 公司 WEC 所在行业: Utilities 公司 DTE 所在行业: Utilities 公司 BKR 所在行业: Energy 公司 SLB 所在行业: Energy 公司 MAA 所在行业: Real_Estate 公司 CTVA 所在行业: Materials 公司 MDT 所在行业: Health_Care 公司 CPAY 所在行业: Financials 公司 SRE 所在行业: Utilities 公司 HCA 所在行业: Health_Care 公司 NUE 所在行业: Materials 公司 LMT 所在行业: Industrials
独立成分分析 ¶
In [194]:
from sklearn.decomposition import FastICA
# 数据准备
X = x_data.to_numpy() # 观测信号
time=np.arange(len(X))
# 应用 ICA
ica = FastICA(n_components=2, random_state=42)
S_estimated = ica.fit_transform(X) # 分离出的信号
A_estimated = ica.mixing_ # 估计的混合矩阵
# 绘图分析
fig, axes = plt.subplots(2, 2, figsize=(12, 8), constrained_layout=True)
# 混合信号
axes[0, 0].plot(time, X[:, 0], color='green')
axes[0, 0].set_title('股票A')
axes[0, 1].plot(time, X[:, 99], color='purple')
axes[0, 1].set_title('股票B')
# 分离信号
axes[1, 0].plot(time, S_estimated[:, 0], color='orange')
axes[1, 0].set_title('提取特征A')
axes[1, 1].plot(time, S_estimated[:, 1], color='cyan')
axes[1, 1].set_title('提取特征B')
plt.suptitle('Independent Component Analysis (ICA) Results', fontsize=16)
plt.show()
Out[194]:
[<matplotlib.lines.Line2D at 0x195904fb100>]
Out[194]:
Text(0.5, 1.0, '股票A')
Out[194]:
[<matplotlib.lines.Line2D at 0x195904fb4c0>]
Out[194]:
Text(0.5, 1.0, '股票B')
Out[194]:
[<matplotlib.lines.Line2D at 0x195904fb730>]
Out[194]:
Text(0.5, 1.0, '提取特征A')
Out[194]:
[<matplotlib.lines.Line2D at 0x195904fb9a0>]
Out[194]:
Text(0.5, 1.0, '提取特征B')
Out[194]:
Text(0.5, 0.98, 'Independent Component Analysis (ICA) Results')
多维尺度分析 ¶
In [196]:
from sklearn.manifold import MDS
from sklearn.metrics import pairwise_distances
data=x_data
distance_matrix = pairwise_distances(data, metric='euclidean')
# MDS降维
mds = MDS(n_components=6, dissimilarity='precomputed', random_state=42)
low_dim_data = mds.fit_transform(distance_matrix)
# 绘图
fig, ax = plt.subplots(1, 2, figsize=(16, 8), dpi=120)
# 图1:MDS降维后的散点图
scatter = ax[0].scatter(low_dim_data[:, 0], low_dim_data[:, 1], cmap='viridis', s=50, edgecolor='k')
ax[0].set_title("MDS Projection (2D)", fontsize=14)
ax[0].set_xlabel("Component 1")
ax[0].set_ylabel("Component 2")
plt.colorbar(scatter, ax=ax[0], label="Cluster Labels")
# 图2:距离矩阵的热力图
im = ax[1].imshow(distance_matrix, cmap='hot', interpolation='nearest')
ax[1].set_title("Distance Matrix Heatmap", fontsize=14)
ax[1].set_xlabel("Sample Index")
ax[1].set_ylabel("Sample Index")
plt.colorbar(im, ax=ax[1], label="Distance")
plt.tight_layout()
plt.show()
C:\Users\cqm\AppData\Local\Temp\ipykernel_30380\2286233253.py:14: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored scatter = ax[0].scatter(low_dim_data[:, 0], low_dim_data[:, 1], cmap='viridis', s=50, edgecolor='k')
Out[196]:
Text(0.5, 1.0, 'MDS Projection (2D)')
Out[196]:
Text(0.5, 0, 'Component 1')
Out[196]:
Text(0, 0.5, 'Component 2')
Out[196]:
<matplotlib.colorbar.Colorbar at 0x195b3ccd9d0>
Out[196]:
Text(0.5, 1.0, 'Distance Matrix Heatmap')
Out[196]:
Text(0.5, 0, 'Sample Index')
Out[196]:
Text(0, 0.5, 'Sample Index')
Out[196]:
<matplotlib.colorbar.Colorbar at 0x195b3d12100>
t-SNE ¶
In [200]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
X=x_data
# 使用PCA降维到3D空间(便于对比)
pca = PCA(n_components=3)
X_pca1 = pca.fit_transform(X)
# 使用t-SNE降维到2D空间
tsne = TSNE(n_components=2, random_state=42, perplexity=30, n_iter=1000)
X_tsne = tsne.fit_transform(X)
fig = plt.figure(figsize=(16, 8))
# 原始PCA 3D降维
ax1 = fig.add_subplot(1, 2, 1, projection='3d')
scatter = ax1.scatter(X_pca1[:, 0], X_pca1[:, 1], X_pca1[:, 2], cmap='rainbow', s=10)
ax1.set_title('PCA 3D Visualization')
ax1.set_xlabel('PCA1')
ax1.set_ylabel('PCA2')
ax1.set_zlabel('PCA3')
legend1 = ax1.legend(*scatter.legend_elements(), title="Classes", loc="best")
ax1.add_artist(legend1)
# t-SNE 2D降维
ax2 = fig.add_subplot(1, 2, 2)
scatter = ax2.scatter(X_tsne[:, 0], X_tsne[:, 1], cmap='rainbow', s=10)
ax2.set_title('t-SNE 2D Visualization')
ax2.set_xlabel('t-SNE1')
ax2.set_ylabel('t-SNE2')
legend2 = ax2.legend(*scatter.legend_elements(), title="Classes", loc="best")
ax2.add_artist(legend2)
plt.tight_layout()
plt.show()
c:\anaconda\envs\pytorch\lib\site-packages\sklearn\manifold\_t_sne.py:1162: FutureWarning: 'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7. warnings.warn( C:\Users\cqm\AppData\Local\Temp\ipykernel_30380\647746706.py:18: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored scatter = ax1.scatter(X_pca1[:, 0], X_pca1[:, 1], X_pca1[:, 2], cmap='rainbow', s=10)
Out[200]:
Text(0.5, 0.92, 'PCA 3D Visualization')
Out[200]:
Text(0.5, 0, 'PCA1')
Out[200]:
Text(0.5, 0.5, 'PCA2')
Out[200]:
Text(0.5, 0, 'PCA3')
c:\anaconda\envs\pytorch\lib\site-packages\matplotlib\collections.py:1109: UserWarning: Collection without array used. Make sure to specify the values to be colormapped via the `c` argument.
warnings.warn("Collection without array used. Make sure to "
Out[200]:
<matplotlib.legend.Legend at 0x195cdab8460>
C:\Users\cqm\AppData\Local\Temp\ipykernel_30380\647746706.py:28: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored scatter = ax2.scatter(X_tsne[:, 0], X_tsne[:, 1], cmap='rainbow', s=10)
Out[200]:
Text(0.5, 1.0, 't-SNE 2D Visualization')
Out[200]:
Text(0.5, 0, 't-SNE1')
Out[200]:
Text(0, 0.5, 't-SNE2')
Out[200]:
<matplotlib.legend.Legend at 0x195cdaf0e80>
UMAP
In [207]:
import umap
# 第一步:生成数据集
X=x_data.to_numpy()
# 第二步:应用 UMAP 进行降维
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2, random_state=42)
embedding = reducer.fit_transform(X)
# 第三步:可视化
fig, axs = plt.subplots(1, 2, figsize=(12, 6), dpi=120)
# UMAP 2D 嵌入
scatter = axs[0].scatter(embedding[:, 0], embedding[:, 1], cmap='Spectral', s=5)
axs[0].set_title("UMAP 2D Embedding")
axs[0].set_xlabel("UMAP1")
axs[0].set_ylabel("UMAP2")
# UMAP 嵌入的密度图
sns.kdeplot(x=embedding[:, 0], y=embedding[:, 1], cmap="Reds", fill=True, ax=axs[1])
axs[1].set_title("Density of UMAP Embedding")
axs[1].set_xlabel("UMAP1")
axs[1].set_ylabel("UMAP2")
# 为 UMAP 图添加颜色条
cbar = fig.colorbar(scatter, ax=axs[1], orientation='vertical', fraction=0.02, pad=0.04)
cbar.set_label("Color")
plt.tight_layout()
plt.show()
c:\anaconda\envs\pytorch\lib\site-packages\umap\umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism. warn( C:\Users\cqm\AppData\Local\Temp\ipykernel_30380\3946075921.py:14: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored scatter = axs[0].scatter(embedding[:, 0], embedding[:, 1], cmap='Spectral', s=5)
Out[207]:
Text(0.5, 1.0, 'UMAP 2D Embedding')
Out[207]:
Text(0.5, 0, 'UMAP1')
Out[207]:
Text(0, 0.5, 'UMAP2')
Out[207]:
<Axes: >
Out[207]:
Text(0.5, 1.0, 'Density of UMAP Embedding')
Out[207]:
Text(0.5, 0, 'UMAP1')
Out[207]:
Text(0, 0.5, 'UMAP2')
KPCA
In [208]:
from sklearn.decomposition import KernelPCA
X=x_data.to_numpy()
# 核PCA降维
kernel_pca = KernelPCA(n_components=2, kernel='rbf', gamma=15,fit_inverse_transform=True)
X_kpca = kernel_pca.fit_transform(X)
#投影后数据可视化
plt.subplot(1, 2, 1)
plt.scatter(X_kpca[:, 0], X_kpca[:, 1], cmap='plasma', edgecolor='k')
plt.title('Kernel PCA Projection')
plt.xlabel('PC1')
plt.ylabel('PC2')
# 核PCA没有显式的解释方差,因此直接基于特征值来估计
lambdas = kernel_pca.eigenvalues_
explained_variance_ratio = lambdas / np.sum(lambdas)
cumulative_variance_ratio = np.cumsum(explained_variance_ratio)
plt.subplot(1, 2, 2)
plt.plot(range(1, len(cumulative_variance_ratio) + 1), cumulative_variance_ratio, marker='o', color='red')
plt.title('Cumulative Variance Ratio')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Variance')
# 总体展示
plt.tight_layout()
plt.show()
Out[208]:
<Axes: >
C:\Users\cqm\AppData\Local\Temp\ipykernel_30380\2229412370.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored plt.scatter(X_kpca[:, 0], X_kpca[:, 1], cmap='plasma', edgecolor='k')
Out[208]:
<matplotlib.collections.PathCollection at 0x195b4267310>
Out[208]:
Text(0.5, 1.0, 'Kernel PCA Projection')
Out[208]:
Text(0.5, 0, 'PC1')
Out[208]:
Text(0, 0.5, 'PC2')
Out[208]:
<Axes: >
Out[208]:
[<matplotlib.lines.Line2D at 0x195b445e640>]
Out[208]:
Text(0.5, 1.0, 'Cumulative Variance Ratio')
Out[208]:
Text(0.5, 0, 'Number of Components')
Out[208]:
Text(0, 0.5, 'Cumulative Variance')
自编码器
In [228]:
import torch
import torch.nn as nn
import torch.optim as optim
data = torch.tensor(x_data.to_numpy(), dtype=torch.float32)
# 2. 定义自编码器
class Autoencoder(nn.Module):
def __init__(self, input_dim, latent_dim):
super(Autoencoder, self).__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, 8),
nn.ReLU(),
nn.Linear(8, latent_dim),
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(latent_dim, 8),
nn.ReLU(),
nn.Linear(8, input_dim),
)
def forward(self, x):
z = self.encoder(x)
x_hat = self.decoder(z)
return x_hat, z
# 3. 初始化模型
input_dim = 482
latent_dim = 2
model = Autoencoder(input_dim, latent_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
# 4. 训练模型
epochs = 100
losses = []
for epoch in range(epochs):
model.train()
optimizer.zero_grad()
x_hat, z = model(data)
loss = criterion(x_hat, data)
loss.backward()
optimizer.step()
losses.append(loss.item())
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
# 5. 可视化分析
model.eval()
with torch.no_grad():
x_hat, z = model(data)
# 创建子图
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
# 图1:原始数据 vs 重建数据
axs[0].scatter(data[:, 0], data[:, 99], color='blue', alpha=0.5, label='Original Data')
axs[0].scatter(x_hat[:, 0], x_hat[:, 99], color='red', alpha=0.5, label='Reconstructed Data')
axs[0].set_title('原始数据和重构后的数据')
axs[0].legend()
axs[0].grid(False)
# 图2:潜在空间分布
axs[1].scatter(z[:, 0], z[:, 1], color='green', alpha=0.7)
axs[1].set_title('潜在空间分布')
axs[1].grid(False)
plt.tight_layout()
plt.show()
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 10/100, Loss: 0.9969
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 20/100, Loss: 0.8073
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 30/100, Loss: 0.5334
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 40/100, Loss: 0.3568
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 50/100, Loss: 0.3023
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 60/100, Loss: 0.2743
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 70/100, Loss: 0.2501
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 80/100, Loss: 0.2236
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 90/100, Loss: 0.1968
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Epoch 100/100, Loss: 0.1740
Out[228]:
Autoencoder(
(encoder): Sequential(
(0): Linear(in_features=482, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=2, bias=True)
)
(decoder): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=482, bias=True)
)
)
Out[228]:
<matplotlib.collections.PathCollection at 0x195cfe22f40>
Out[228]:
<matplotlib.collections.PathCollection at 0x195cfe22f70>
Out[228]:
Text(0.5, 1.0, '原始数据和重构后的数据')
Out[228]:
<matplotlib.legend.Legend at 0x195cfe3b6a0>
Out[228]:
<matplotlib.collections.PathCollection at 0x195c47c3070>
Out[228]:
Text(0.5, 1.0, '潜在空间分布')
In [135]:
z.shape
Out[135]:
torch.Size([2340, 2])
比较各种方法下特征与y的xicor相关系数¶
In [172]:
def reduction_xicor(x,y):
x=np.array(x);y=np.array(y)
p=x.shape[1];rho=[]
for i in range(p):
rho.append(max(Xi(list(x[:,i]),list(y)).correlation,Xi(list(y),list(x[:,i])).correlation))
return rho
In [217]:
def reduction_pearsonr(x,y):
x=np.array(x);y=np.array(y)
p=x.shape[1];rho=[]
for i in range(p):
rho.append(pearsonr(x[:,i],y)[0])
return rho
In [203]:
reduction_xicor(X_pca,y_data)
Out[203]:
[0.7459004048727824, 0.7481674245395772, 0.5584143721635395, 0.4331119547901775, 0.30003493681695825, 0.4064567182512817]
In [218]:
reduction_pearsonr(X_pca,y_data)
Out[218]:
[0.7083096789491002, -0.6320078786457969, -0.037307291010294424, 0.0923318900143941, 0.14906052044247192, 0.19183080659713656]
In [204]:
reduction_xicor(factor_scores,y_data)
Out[204]:
[0.7484501171804825, 0.7617224271001951, 0.5827116950785585, 0.5674030120072084, 0.38946990871553455, 0.3844826839949381]
In [219]:
reduction_pearsonr(factor_scores,y_data)
Out[219]:
[-0.7343696011917709, -0.6092612746411366, 0.0542417977173028, -0.0623710314881693, -0.12528733045690876, 0.18332668381712192]
In [195]:
ica = FastICA(n_components=6, random_state=42)
S_estimated1 = ica.fit_transform(X) # 分离出的信号
reduction_xicor(S_estimated1,y_data)
Out[195]:
[0.4414496021348532, 0.42401534517045536, 0.3039451939413387, 0.7065951707078415, 0.5546470806207686, 0.3306891173002259]
In [220]:
reduction_pearsonr(S_estimated1,y_data)
Out[220]:
[0.2972723875824521, 0.07324544942087362, 0.38351719475898216, -0.8161480348066641, -0.18469031486676912, 0.1704181486629671]
In [201]:
reduction_xicor(low_dim_data,y_data)
Out[201]:
[0.5367208226899011, 0.4674407311419262, 0.4466205432501539, 0.7051943081442855, 0.450000995324895, 0.340893480329732]
In [221]:
reduction_pearsonr(low_dim_data,y_data)
Out[221]:
[0.6005599523194682, 0.13558474452281036, -0.07688185949302231, -0.8578777562439068, -0.07554015165043973, 0.02540276577731806]
In [202]:
tsne1 = TSNE(n_components=3, random_state=42, perplexity=30, n_iter=1000)
X_tsne1 = tsne1.fit_transform(X)
reduction_xicor(X_tsne1,y_data)
c:\anaconda\envs\pytorch\lib\site-packages\sklearn\manifold\_t_sne.py:1162: FutureWarning: 'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7. warnings.warn(
Out[202]:
[0.48284178589410953, 0.46828611810324317, 0.36653752565520215]
In [222]:
reduction_pearsonr(X_tsne1,y_data)
Out[222]:
[0.6122914509078293, -0.4863056001563527, 0.006704770922268464]
In [210]:
reducer1 = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=6, random_state=42)
embedding1 = reducer1.fit_transform(X)
reduction_xicor(embedding1,y_data)
c:\anaconda\envs\pytorch\lib\site-packages\umap\umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism. warn(
Out[210]:
[0.758938233299651, 0.4563471503300368, 0.38572309623111556, 0.34759302132972125, 0.34411504567810747, 0.34324007291257097]
In [223]:
reduction_pearsonr(embedding1,y_data)
Out[223]:
[-0.6084088004336915, -0.15193324811513242, 0.1299335100577593, -0.020454157224254456, 0.16963724734130026, -0.10434447647008584]
In [211]:
kernel_pca1 = KernelPCA(n_components=6, kernel='rbf', gamma=15,fit_inverse_transform=True)
X_kpca1 = kernel_pca1.fit_transform(X)
reduction_xicor(X_kpca1,y_data)
Out[211]:
[0.26596907307594597, 0.2773929809280544, 0.27674134160613006, 0.2846975720741751, 0.2618489896450399, 0.2733491096369587]
In [224]:
reduction_pearsonr(X_kpca1,y_data)
Out[224]:
[-0.034943145721764504, -0.03150887538542018, 0.025142330088421656, -0.02199287558944993, -0.024738561667554088, -0.043182417360195686]
In [227]:
reduction_xicor(z,y_data)
Out[227]:
[0.6328036257682754, 0.6473454319792227, 0.6354453775404563, 0.7953190230515058, 0.6747451331658447, 0.7394576423591268]
In [226]:
reduction_pearsonr(z,y_data)
Out[226]:
[0.4464784643900447, 0.7340306549811457, 0.7267903125083403, -0.7457705131219041, -0.7375254950328844, 0.8882634058457526]
In [24]:
#导出依赖包
import pipreqs
!pipreqs --encoding=utf8
INFO: Not scanning for jupyter notebooks. WARNING: Import named "matplotlib" not found locally. Trying to resolve it at the PyPI server. WARNING: Import named "matplotlib" was resolved to "matplotlib:3.10.0" package (https://pypi.org/project/matplotlib/). Please, verify manually the final list of requirements.txt to avoid possible dependency confusions. WARNING: Import named "numpy" not found locally. Trying to resolve it at the PyPI server. WARNING: Import named "numpy" was resolved to "numpy:2.2.1" package (https://pypi.org/project/numpy/). Please, verify manually the final list of requirements.txt to avoid possible dependency confusions. WARNING: Import named "pandas" not found locally. Trying to resolve it at the PyPI server. WARNING: Import named "pandas" was resolved to "pandas:2.2.3" package (https://pypi.org/project/pandas/). Please, verify manually the final list of requirements.txt to avoid possible dependency confusions. WARNING: Import named "scikit_learn" not found locally. Trying to resolve it at the PyPI server. WARNING: Import named "scikit_learn" was resolved to "scikit-learn:1.6.0" package (https://pypi.org/project/scikit-learn/). Please, verify manually the final list of requirements.txt to avoid possible dependency confusions. WARNING: Import named "scipy" not found locally. Trying to resolve it at the PyPI server. WARNING: Import named "scipy" was resolved to "scipy:1.14.1" package (https://pypi.org/project/scipy/). Please, verify manually the final list of requirements.txt to avoid possible dependency confusions. WARNING: Import named "seaborn" not found locally. Trying to resolve it at the PyPI server. WARNING: Import named "seaborn" was resolved to "seaborn:0.13.2" package (https://pypi.org/project/seaborn/). Please, verify manually the final list of requirements.txt to avoid possible dependency confusions. WARNING: Import named "yfinance" not found locally. Trying to resolve it at the PyPI server. WARNING: Import named "yfinance" was resolved to "yfinance:0.2.51" package (https://pypi.org/project/yfinance/). Please, verify manually the final list of requirements.txt to avoid possible dependency confusions. INFO: Successfully saved requirements file in c:\Users\cqm\Desktop\多元统计陈麒名\requirements.txt